#clear environment 
rm(list = ls())

library(readr)
library(tidyr)
library(haven) #to read in the Stata data file
require(tigris) #
require(tidycensus) #
require(sp)
require(rgdal) #
require(sf) #
require(raster)
require(stargazer) #
require(readxl)
require(haven)
require(zoo) #== for na.locf
require(snow) #
require(doSNOW) #
require(parallel)
require(foreach)
require(iterators)
require(foreign)
require(abind)
library(dplyr)
library(Rfast)

# set directories
source("./Code/set_directories.R")

# Read in source receptor matrix from Solar Siting, merge with number of systems (from Solar Siting) and Output Matrices by Pollutant
source.receptor <- readRDS(file.path(DATA.SOLAR.SITING,"CEC_to_damages_by_POLL.rds"))
source.receptor = dplyr::left_join(source.receptor, read_csv(file.path(DATA.SOLAR.SITING, "NumberSystems.csv")) %>% dplyr::mutate(zip = sprintf("%05d", zipcode)) %>% dplyr::select(zip, num_systems), by="zip") %>% replace_na(list(num_systems = 0))
source.receptor <- source.receptor %>% relocate(zip, POLL, num_systems, total_damages)#[,c(1,2,3113,3:3112)]

#read in number of rooftops (constraint)
housing.units <- read_dta(file.path(DATA.SOLAR.SITING,"HousingUnits.dta")) %>% dplyr::mutate(zip = sprintf("%05d", zipcode)) %>% dplyr::select(zip, single_units)
#merge with source receptor matrix
source.receptor = left_join(source.receptor,housing.units,by="zip") %>% replace_na(list(single_units = 0))
source.receptor <- source.receptor[,c(1,2,3,3114,4:3113)]

#read in below file to match zips to states
zip.state <- read_csv(file.path(DATA.PATH,"zip_state.csv"),col_types = cols(.default = col_character())) 
#merge state variable with source.receptor matrix
source.receptor <- left_join(source.receptor,zip.state,by="zip") %>% relocate(zip,state,POLL,num_systems,single_units)

co2.source.receptor <- subset(source.receptor,POLL=="CO2")
nox.source.receptor <- subset(source.receptor,POLL=="NOx")
pm25.source.receptor <- subset(source.receptor,POLL=="PM25")
so2.source.receptor <- subset(source.receptor,POLL=="SO2")

#sum nox, pm25, and so2 per system avoided damage matrices to get one per system avoided damage matrix
total.damages.noco2.per.system <- data.frame(cbind(nox.source.receptor$zip,nox.source.receptor$state,nox.source.receptor$num_systems,nox.source.receptor$single_units,nox.source.receptor[,6:3115]+pm25.source.receptor[,6:3115]+so2.source.receptor[,6:3115]))
vars = names(nox.source.receptor[,6:3115])
colnames(total.damages.noco2.per.system) <- c("zip","state","num_systems","single_units",vars)

#sum co2, nox, pm25, and so2 per system avoided damage matrices to get one per system avoided damage matrix
total.damages.wco2.per.system <- data.frame(cbind(nox.source.receptor$zip,nox.source.receptor$state,nox.source.receptor$num_systems,nox.source.receptor$single_units,co2.source.receptor[,6:3115]+nox.source.receptor[,6:3115]+pm25.source.receptor[,6:3115]+so2.source.receptor[,6:3115]))
colnames(total.damages.wco2.per.system) <- c("zip","state","num_systems","single_units",vars)

#read in ACS data aggregated to county level
ACSdata <- read_csv(file.path(DATA.PROCESSED,"ACS2018_countyaggregation.csv"), 
                    col_types = cols(.default = "d"))
ACSdata <- ACSdata[,-1]
ACSdata <- ACSdata %>% mutate(County = sprintf("%05d", County))

#count total number of existing installations (systems)
total.systems <- sum(total.damages.noco2.per.system$num_systems)

#Optimizations #1-6: Put panels in counties/zips with highest shares of non-white households (i.e., lowest shares of white households).
#merge zip-level ACSdata with source-receptor matrix (containing single_units). Rank by metric of interest. 
#Fill zips with panels.
ACSdata_zips <- read_csv(file.path(DATA.PROCESSED,"ACS2018_zipaggregation.csv"), 
                         col_types = cols(.default = "d")) %>%
  select(-X1) %>% mutate(zip = sprintf("%05d", ZCTA5)) %>% select(-ZCTA5) %>% relocate(zip) %>%
  mutate(Zip.POP.nonwhite = Zip.POP - Zip.POP.whitealone) %>%
  mutate(Zip.nonwhite.share = Zip.POP.nonwhite/Zip.POP) %>%
  mutate(bottom1.share = Zip.POP.decile1/Zip.POP) %>%
  mutate(bottom3.pop = Zip.POP.decile1+Zip.POP.decile2+Zip.POP.decile3) %>%
  mutate(bottom3.share = bottom3.pop/Zip.POP) %>%
  mutate(bottom5.pop = Zip.POP.decile1+Zip.POP.decile2+Zip.POP.decile3+Zip.POP.decile4+Zip.POP.decile5) %>%
  mutate(bottom5.share = bottom5.pop/Zip.POP)

#Simulate reallocations. Constraint is 30% of rooftops.
zip.reallocations <- select(total.damages.noco2.per.system,zip,state,num_systems,single_units,total_damages) %>%
  left_join(ACSdata_zips,by="zip") %>%
  mutate(single_units30=floor(0.3*single_units)) %>%
  #1. reallocate to zips according to greatest nonwhite share
  arrange(-Zip.nonwhite.share) %>%
  mutate(cum_units = cumsum(single_units30)) %>%
  mutate(stop.index = min(which(cum_units > total.systems))) %>%
  mutate(nonwhite_share.reallocation = if_else(cum_units <= total.systems,single_units30,0)) %>%
  mutate(cum_systems_assigned = cumsum(nonwhite_share.reallocation)) %>%
  mutate(nonwhite_share.reallocation = if_else(row_number()==stop.index,total.systems-cum_systems_assigned,nonwhite_share.reallocation)) %>%
  select(-c(cum_units,stop.index,cum_systems_assigned)) %>%
  #2. reallocate to zips according to greatest nonwhite population
  arrange(-Zip.POP.nonwhite) %>%
  mutate(cum_units = cumsum(single_units30)) %>%
  mutate(stop.index = min(which(cum_units > total.systems))) %>%
  mutate(nonwhite_pop.reallocation = if_else(cum_units <= total.systems,single_units30,0)) %>%
  mutate(cum_systems_assigned = cumsum(nonwhite_pop.reallocation)) %>%
  mutate(nonwhite_pop.reallocation = if_else(row_number()==stop.index,total.systems-cum_systems_assigned,nonwhite_pop.reallocation)) %>%
  select(-c(cum_units,stop.index,cum_systems_assigned)) %>%
  #3. reallocate to zips according to greatest bottom1 share
  arrange(-bottom1.share) %>%
  mutate(cum_units = cumsum(single_units30)) %>%
  mutate(stop.index = min(which(cum_units > total.systems))) %>%
  mutate(bottom1_share.reallocation = if_else(cum_units <= total.systems,single_units30,0)) %>%
  mutate(cum_systems_assigned = cumsum(bottom1_share.reallocation)) %>%
  mutate(bottom1_share.reallocation = if_else(row_number()==stop.index,total.systems-cum_systems_assigned,bottom1_share.reallocation)) %>%
  select(-c(cum_units,stop.index,cum_systems_assigned)) %>%
  #4. reallocate to zips according to greatest bottom1 pop
  arrange(-Zip.POP.decile1) %>%
  mutate(cum_units = cumsum(single_units30)) %>%
  mutate(stop.index = min(which(cum_units > total.systems))) %>%
  mutate(bottom1_pop.reallocation = if_else(cum_units <= total.systems,single_units30,0)) %>%
  mutate(cum_systems_assigned = cumsum(bottom1_pop.reallocation)) %>%
  mutate(bottom1_pop.reallocation = if_else(row_number()==stop.index,total.systems-cum_systems_assigned,bottom1_pop.reallocation)) %>%
  select(-c(cum_units,stop.index,cum_systems_assigned)) %>%
  #5. reallocate to zips according to greatest bottom3 share
  arrange(-bottom3.share) %>%
  mutate(cum_units = cumsum(single_units30)) %>%
  mutate(stop.index = min(which(cum_units > total.systems))) %>%
  mutate(bottom3_share.reallocation = if_else(cum_units <= total.systems,single_units30,0)) %>%
  mutate(cum_systems_assigned = cumsum(bottom3_share.reallocation)) %>%
  mutate(bottom3_share.reallocation = if_else(row_number()==stop.index,total.systems-cum_systems_assigned,bottom3_share.reallocation)) %>%
  select(-c(cum_units,stop.index,cum_systems_assigned)) %>%
  #6. reallocate to zips according to greatest bottom3 pop
  arrange(-bottom3.pop) %>%
  mutate(cum_units = cumsum(single_units30)) %>%
  mutate(stop.index = min(which(cum_units > total.systems))) %>%
  mutate(bottom3_pop.reallocation = if_else(cum_units <= total.systems,single_units30,0)) %>%
  mutate(cum_systems_assigned = cumsum(bottom3_pop.reallocation)) %>%
  mutate(bottom3_pop.reallocation = if_else(row_number()==stop.index,total.systems-cum_systems_assigned,bottom3_pop.reallocation)) %>%
  select(-c(cum_units,stop.index,cum_systems_assigned)) %>%
  #7. reallocate to zips according to greatest bottom5 share
  arrange(-bottom5.share) %>%
  mutate(cum_units = cumsum(single_units30)) %>%
  mutate(stop.index = min(which(cum_units > total.systems))) %>%
  mutate(bottom5_share.reallocation = if_else(cum_units <= total.systems,single_units30,0)) %>%
  mutate(cum_systems_assigned = cumsum(bottom5_share.reallocation)) %>%
  mutate(bottom5_share.reallocation = if_else(row_number()==stop.index,total.systems-cum_systems_assigned,bottom5_share.reallocation)) %>%
  select(-c(cum_units,stop.index,cum_systems_assigned)) %>%
  #8. reallocate to zips according to greatest bottom5 pop
  arrange(-bottom5.pop) %>%
  mutate(cum_units = cumsum(single_units30)) %>%
  mutate(stop.index = min(which(cum_units > total.systems))) %>%
  mutate(bottom5_pop.reallocation = if_else(cum_units <= total.systems,single_units30,0)) %>%
  mutate(cum_systems_assigned = cumsum(bottom5_pop.reallocation)) %>%
  mutate(bottom5_pop.reallocation = if_else(row_number()==stop.index,total.systems-cum_systems_assigned,bottom5_pop.reallocation)) %>%
  select(-c(cum_units,stop.index,cum_systems_assigned)) %>%
  #drop all remaining variables except reallocations
  select(zip,ends_with(".reallocation"))

#Optimization #7-10: Put panels in zip codes such that they maximize benefits non-white benefits, low-income benefits, etc.

counties <- names(total.damages.noco2.per.system)[6:3114]

transposed.damages <- as.data.frame(base::t(total.damages.noco2.per.system))#[5:3113,]
colnames(transposed.damages) <- total.damages.noco2.per.system$zip
zips <- colnames(transposed.damages)


#write function to create matrices for other objectives
create.objective.SR <- function(objective){
  #options for objective are "nonwhite", "bottom1", "bottom3", "bottom5"
  ACSdata.fun <- ACSdata %>%
    mutate(obj.div.by.POP = ((objective=="nonwhite")*(County.POP-County.POP.whitealone) + 
                               (objective=="bottom1")*(County.POP.decile1) +
                               (objective=="bottom3")*(County.POP.decile1+County.POP.decile2+County.POP.decile3) + 
                               (objective=="bottom5")*(County.POP.decile1+County.POP.decile2+County.POP.decile3+County.POP.decile4+County.POP.decile5)) 
           /County.POP)
  inside1 <- ACSdata.fun %>% select(County,County.POP,obj.div.by.POP) %>%
    left_join(tibble::rownames_to_column(transposed.damages[6:3114,],var="County"),by="County") %>%
    mutate_at(zips,as.numeric)
  inside2 <- inside1 %>%
    mutate_at(zips,~.*inside1$obj.div.by.POP) %>%
    select(-c(County.POP,obj.div.by.POP))
  objective.SR <- as.data.frame(t(inside2))
  colnames(objective.SR) <- objective.SR[1,]
  objective.SR <- objective.SR[-1,]
  objective.SR <- objective.SR %>% mutate(across(everything(),as.numeric))
  #write data to file
  saveRDS(objective.SR,file = file.path(DATA.PROCESSED,paste0(objective,"_SR_stdsize.rds")))
  return(objective.SR)
}

nonwhite.SR1 <- create.objective.SR("nonwhite")
bottom1.SR <- create.objective.SR("bottom1")
bottom3.SR <- create.objective.SR("bottom3")
bottom5.SR <- create.objective.SR("bottom5")

# use four lines below if datasets above are already created (otherwise comment them out)
nonwhite.SR <- read_rds(file.path(WORK.DIRECTORY,"Data/nonwhite_SR_stdsize.rds"))
bottom1.SR <- read_rds(file.path(WORK.DIRECTORY,"Data/bottom1_SR_stdsize.rds"))
bottom3.SR <- read_rds(file.path(WORK.DIRECTORY,"Data/bottom3_SR_stdsize.rds"))
bottom5.SR <- read_rds(file.path(WORK.DIRECTORY,"Data/bottom5_SR_stdsize.rds"))

#Distributional "benefits received" reallocation simulations. Constraint is 30% of rooftops.
output.reallocated.df <- function(objective){
  if(objective=="nonwhite"){
    obj.SR <- nonwhite.SR
  } else if(objective=="bottom1"){
    obj.SR <- bottom1.SR
  } else if(objective=="bottom3"){
    obj.SR <- bottom3.SR
  } else if(objective=="bottom5"){
    obj.SR <- bottom5.SR
  }
  obj.SR.rowsum <- rowSums(obj.SR,na.rm=TRUE)
  obj.SR.rowsum.df <- as.data.frame(cbind(total.damages.noco2.per.system[,c(1,4)],obj.SR.rowsum))
  sorted.obj.SR.rowsum.df <- obj.SR.rowsum.df %>%
    arrange(-obj.SR.rowsum) %>%
    mutate(single_units30 = floor(0.3*single_units)) %>%
    mutate(cum_units = cumsum(single_units30))
  stop.index <- max(which(sorted.obj.SR.rowsum.df$cum_units <= total.systems))
  sorted.obj.SR.rowsum.df$new_systems <- if_else(sorted.obj.SR.rowsum.df$cum_units <= total.systems,sorted.obj.SR.rowsum.df$single_units30,0)
  sorted.obj.SR.rowsum.df$new_systems[stop.index+1] <- total.systems - sorted.obj.SR.rowsum.df$cum_units[stop.index]
  colnames(sorted.obj.SR.rowsum.df)[colnames(sorted.obj.SR.rowsum.df)=="new_systems"] <- paste0("new_systems_",objective)
  colnames(sorted.obj.SR.rowsum.df)[colnames(sorted.obj.SR.rowsum.df)=="obj.SR.rowsum"] <- paste0(objective,".SR.rowsum")
  return(sorted.obj.SR.rowsum.df)
}

dist.reallocations <- select(output.reallocated.df("nonwhite"),zip,new_systems_nonwhite) %>%
  left_join(select(output.reallocated.df("bottom1"),zip,new_systems_bottom1),by="zip") %>%
  left_join(select(output.reallocated.df("bottom3"),zip,new_systems_bottom3),by="zip") %>%
  left_join(select(output.reallocated.df("bottom5"),zip,new_systems_bottom5),by="zip")

sexton.sims.fun <- function(objective){
  #objective can be "interstate", "interstate30", "intrastate", or "intrastate30"
  if(objective=="interstate"){
    obj.df <- select(total.damages.wco2.per.system,zip,state,num_systems,single_units,total_damages) %>%
      arrange(-total_damages) %>%
      mutate(cum_units = cumsum(single_units))
    stop.index <- max(which(obj.df$cum_units <= total.systems))
    obj.df$interstate <- if_else(obj.df$cum_units <= total.systems,obj.df$single_units,0)
    obj.df$interstate[stop.index+1] <- total.systems - obj.df$cum_units[stop.index]
  } else if(objective=="interstate30"){
    obj.df <- select(total.damages.wco2.per.system,zip,state,num_systems,single_units,total_damages) %>%
      arrange(-total_damages) %>%
      mutate(single_units30 = floor(single_units*.3)) %>%
      mutate(cum_units = cumsum(single_units30))
    stop.index <- max(which(obj.df$cum_units <= total.systems))
    obj.df$interstate30 <- if_else(obj.df$cum_units <= total.systems,obj.df$single_units30,0)
    obj.df$interstate30[stop.index+1] <- total.systems - obj.df$cum_units[stop.index]
  } else if(objective=="intrastate"){
    obj.df <- select(total.damages.wco2.per.system,zip,state,num_systems,single_units,total_damages) %>%
      group_by(state) %>%
      arrange(-total_damages,.by_group = TRUE) %>%
      mutate(cum_units = cumsum(single_units)) %>%
      mutate(total_systems_state = sum(num_systems)) %>%
      mutate(index_wi_state = row_number()) %>%
      mutate(stop.index = min(which(cum_units > total_systems_state))) %>%
      mutate(intrastate = if_else(cum_units <= total_systems_state,single_units,0)) %>%
      mutate(cum_systems_assigned = cumsum(intrastate)) %>%
      mutate(intrastate = if_else(index_wi_state==stop.index,total_systems_state-cum_systems_assigned,intrastate)) %>%
      select(-c(stop.index,cum_systems_assigned,index_wi_state,total_systems_state)) %>%
      ungroup()
  } else if(objective=="intrastate30"){
    obj.df <- select(total.damages.wco2.per.system,zip,state,num_systems,single_units,total_damages) %>%
      group_by(state) %>%
      arrange(-total_damages,.by_group = TRUE) %>%
      mutate(single_units30 = floor(single_units*.3)) %>%
      mutate(cum_units = cumsum(single_units30)) %>%
      mutate(total_systems_state = sum(num_systems)) %>%
      mutate(index_wi_state = row_number()) %>%
      mutate(stop.index = min(which(cum_units > total_systems_state))) %>%
      mutate(intrastate30 = if_else(cum_units <= total_systems_state,single_units30,0)) %>%
      mutate(cum_systems_assigned = cumsum(intrastate30)) %>%
      mutate(intrastate30 = if_else(index_wi_state==stop.index,total_systems_state-cum_systems_assigned,intrastate30)) %>%
      select(-c(stop.index,cum_systems_assigned,index_wi_state,total_systems_state)) %>%
      ungroup()
  }
  return(obj.df)
}

sexton.reallocations.replicated <- sexton.sims.fun("interstate") %>%
  select(zip,state,num_systems,interstate) %>%
  left_join(select(sexton.sims.fun("interstate30"),zip,interstate30),by="zip") %>%
  left_join(select(sexton.sims.fun("intrastate"),zip,intrastate),by="zip") %>%
  left_join(select(sexton.sims.fun("intrastate30"),zip,intrastate30),by="zip")



#create a dataframe of all reallocations
reallocations.all <- left_join(sexton.reallocations.replicated,dist.reallocations,by="zip") %>%
  left_join(zip.reallocations,by="zip")
saveRDS(reallocations.all,file.path(DATA.PROCESSED,"reallocations_all_stdsize.rds"))

#now let's find (non-CO2) damages received at the county level for each possible allocation
#merge reallocations.all with per system damages. Then multiply reallocation column by per system damage columns...
counties <- names(total.damages.noco2.per.system)[6:3114]
reallocations.total.benefits.df <- reallocations.all %>%
  left_join(total.damages.noco2.per.system,by="zip") %>%
  select(-c(state.y,num_systems.y)) %>% rename(state=state.x, num_systems=num_systems.x)

reallocation.names <- colnames(reallocations.all)[3:19]
reallocations.total.benefits.df.wco2 <- reallocations.all %>%
  left_join(total.damages.wco2.per.system,by="zip") %>%
  select(-c(state.y,num_systems.y)) %>% rename(state=state.x, num_systems=num_systems.x) %>%
  select(-all_of(counties)) %>%
  mutate(across(all_of(reallocation.names),~.*total_damages)) %>%
  summarize(across(all_of(reallocation.names),sum)) %>%
  rename(original = num_systems) %>%
  select(-c(interstate,intrastate))

original.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*num_systems)) %>% summarize(across(all_of(counties),sum))
interstate.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*interstate30)) %>% summarize(across(all_of(counties),sum))
intrastate.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*intrastate30)) %>% summarize(across(all_of(counties),sum))
nonwhite.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*new_systems_nonwhite)) %>% summarize(across(all_of(counties),sum))
bottom1.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*new_systems_bottom1)) %>% summarize(across(all_of(counties),sum))
bottom3.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*new_systems_bottom3)) %>% summarize(across(all_of(counties),sum))
bottom5.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*new_systems_bottom5)) %>% summarize(across(all_of(counties),sum))

zip_nonwhiteShare.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*nonwhite_share.reallocation)) %>% summarize(across(all_of(counties),sum))
zip_nonwhitePop.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*nonwhite_pop.reallocation)) %>% summarize(across(all_of(counties),sum))
zip_bottom1Share.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*bottom1_share.reallocation)) %>% summarize(across(all_of(counties),sum))
zip_bottom1Pop.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*bottom1_pop.reallocation)) %>% summarize(across(all_of(counties),sum))
zip_bottom3Share.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*bottom3_share.reallocation)) %>% summarize(across(all_of(counties),sum))
zip_bottom3Pop.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*bottom3_pop.reallocation)) %>% summarize(across(all_of(counties),sum))
zip_bottom5Share.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*bottom5_share.reallocation)) %>% summarize(across(all_of(counties),sum))
zip_bottom5Pop.benefits <- reallocations.total.benefits.df %>%
  mutate(across(all_of(counties),~.*bottom5_pop.reallocation)) %>% summarize(across(all_of(counties),sum))
#include these new "zip_XXX" dataframes in the "reallocation.benefits" df below


reallocation.benefits <- as.data.frame(cbind(counties,as.data.frame(t(original.benefits)),as.data.frame(t(interstate.benefits)),
                                             as.data.frame(t(intrastate.benefits)),as.data.frame(t(nonwhite.benefits)),
                                             as.data.frame(t(bottom1.benefits)),as.data.frame(t(bottom3.benefits)),
                                             as.data.frame(t(bottom5.benefits)),as.data.frame(t(zip_nonwhiteShare.benefits)),
                                             as.data.frame(t(zip_nonwhitePop.benefits)),as.data.frame(t(zip_bottom1Share.benefits)),
                                             as.data.frame(t(zip_bottom1Pop.benefits)),as.data.frame(t(zip_bottom3Share.benefits)),
                                             as.data.frame(t(zip_bottom3Pop.benefits)),as.data.frame(t(zip_bottom5Share.benefits)),
                                             as.data.frame(t(zip_bottom5Pop.benefits))))
colnames(reallocation.benefits) <- c("county","original","interstate","intrastate","nonwhite","bottom1","bottom3","bottom5",
                                     "zip_nonwhiteShare","zip_nonwhitePop","zip_bottom1Share","zip_bottom1Pop",
                                     "zip_bottom3Share","zip_bottom3Pop","zip_bottom5Share","zip_bottom5Pop")
saveRDS(reallocation.benefits,file.path(DATA.PROCESSED,"reallocation_benefits30_stdsize.rds"))

total.benefits.reallocation <- colSums(reallocation.benefits[,2:16])

#merge "reallocation.benefits" with block group ACS data to calculate benefits that accrue to different demographics, income deciles, etc.
#load block group ACS data
columns <- c("original","interstate","intrastate","intrastate","nonwhite","bottom1","bottom3","bottom5","zip_nonwhiteShare","zip_nonwhitePop","zip_bottom1Share","zip_bottom1Pop",
             "zip_bottom3Share","zip_bottom3Pop","zip_bottom5Share","zip_bottom5Pop")
ACSdata_bg <- read_csv(file.path(DATA.PROCESSED,"ACS2018_blockgroup.csv"), 
                       col_types = cols(.default = "d", GEOID = "c", GeoLabel = "c"))[,-1] %>%
  select(-Description) %>%
  mutate(CountyFIPS = substr(GEOID,start = 1,stop = 5)) %>%
  group_by(CountyFIPS) %>%
  mutate(pop.county = sum(Number.POP)) %>%
  left_join(reallocation.benefits,by=c("CountyFIPS"="county")) %>%
  mutate(
    across(all_of(columns),~./pop.county,.names = "pc.{col}"),
    across(all_of(columns),~./pop.county*Number.POP,.names = "total.{col}"),
    across(all_of(columns),~./pop.county*Number.POP.black,.names = "black.{col}"),
    across(all_of(columns),~./pop.county*Number.POP.asian,.names = "asian.{col}"),
    across(all_of(columns),~./pop.county*Number.POP.whitealone,.names = "whitealone.{col}"),
    across(all_of(columns),~./pop.county*Number.POP.latino,.names = "latino.{col}")
  ) %>% #this creates per capita benefits, total benefits, total black benefits, etc.
  select(-columns) %>%
  ungroup()

saveRDS(ACSdata_bg,file = file.path(DATA.PROCESSED,"ACSdata_bg_pc_reallocations30_stdsize.rds"))
write.dta(ACSdata_bg,file.path(DATA.PROCESSED,"ACSdata_bg_pc_reallocations30_stdsize.dta"))
#ACSdata_bg <- read_rds(file.path(DATA.PROCESSED,"ACSdata_bg_pc_reallocations30_stdsize.rds"))

#calculate benefits ($million) under each reallocation by race (and total)
ACSdata_bg_race <- ACSdata_bg %>%
  summarize(
    across(starts_with("total"),sum,na.rm=TRUE),
    across(starts_with("black"),sum,na.rm=TRUE),
    across(starts_with("asian"),sum,na.rm=TRUE),
    across(starts_with("whitealone"),sum,na.rm=TRUE),
    across(starts_with("latino"),sum,na.rm=TRUE)
  ) %>%
  pivot_longer(cols=everything(),names_to = c("race", "reallocation"),names_sep="[.]",values_to = "value") %>%
  pivot_wider(names_from="reallocation",values_from="value") %>%
  arrange(match(race,c("black","latino","asian","whitealone","total"))) %>%
  mutate(across(all_of(columns),~./1000000)) %>%
  mutate(across(all_of(columns),round,digits=1))

total.wco2.row <- reallocations.total.benefits.df.wco2
colnames(total.wco2.row) <- colnames(ACSdata_bg_race)[2:16]
total.wco2.row <- total.wco2.row %>%  
  mutate(across(all_of(columns),~./1000000)) %>%
  mutate(across(all_of(columns),round,digits=1)) %>% 
  mutate(race="total.wco2") %>% 
  relocate(race,original)
ACSdata_bg_race <- ACSdata_bg_race %>% bind_rows(total.wco2.row)

#calculate benefits ($million) under each reallocation by income decile (and total)
ACSdata_bg_income <- ACSdata_bg %>%
  group_by(Median.decile) %>%
  summarize(across(starts_with("total"),sum,na.rm=TRUE)) %>%
  mutate(across(!Median.decile,~./1000000)) %>%
  mutate(across(!Median.decile,round,digits=1))

total.row <- ACSdata_bg_race[5,]
colnames(total.row) <- colnames(ACSdata_bg_income)
colnames(total.wco2.row) <- colnames(ACSdata_bg_income)
ACSdata_bg_income <- ACSdata_bg_income %>% mutate(Median.decile=as.character(Median.decile)) %>%bind_rows(total.row) %>% bind_rows(total.wco2.row)

#print reallocation outcomes by race to table (.tex file)
ACS_bg_race_for_tex <- ACSdata_bg_race %>%
  select(race,original,interstate,nonwhite,bottom1,zip_nonwhiteShare,zip_bottom1Share) %>%
  rename(Race=race, Original=original, "Max Total Benefits"=interstate, "Max People of Color"=nonwhite, 
         "Max Bottom Decile"=bottom1, "Zips of Color"=zip_nonwhiteShare, "Bottom Decile Zips"=zip_bottom1Share)

byLine <- do.call("c", strsplit(capture.output(stargazer(ACS_bg_race_for_tex, summary=FALSE, rownames=FALSE, float=F)),"\n"))
result <- append(byLine,c(" & \\multicolumn{6}{c}{Reallocation} \\\\ \\cmidrule{2-7}"), after = c(4))
result <- sub(" ccccccc", " lcccccc", result)
result <- append(result,"\\midrule",after = c(10))
result <- append(result, "\\resizebox{\\columnwidth}{!}{",after = c(2))
result <- append(result,"}",after = c(16))
cat(paste(result, collapse = "\n"),file = file.path(TABLES.OUT,"reallocations_races30_stdsize.tex"))

#print reallocation outcomes by income to table (.tex file)
ACS_bg_income_for_tex <- ACSdata_bg_income %>%
  select(Median.decile,total.original,total.interstate,total.nonwhite,total.bottom1,total.zip_nonwhiteShare,total.zip_bottom1Share) %>%
  rename(Decile=Median.decile, Original=total.original, "Max Total Benefits"=total.interstate, "Max People of Color"=total.nonwhite, 
         "Max Bottom Decile"=total.bottom1, "Zips of Color"=total.zip_nonwhiteShare, "Bottom Decile Zips"=total.zip_bottom1Share) %>%
  filter(!is.na(Decile))

byLine <- do.call("c", strsplit(capture.output(stargazer(ACS_bg_income_for_tex, summary=FALSE, rownames=FALSE, float=F)),"\n"))
result <- append(byLine,c(" & \\multicolumn{6}{c}{Reallocation} \\\\ \\cmidrule{2-7}"), after = c(4))
result <- sub(" ccccccc", " lcccccc", result)
result <- append(result,"\\midrule",after = c(16))
result <- append(result, "\\resizebox{\\columnwidth}{!}{",after = c(2))
result <- append(result,"}",after = c(22))
cat(paste(result, collapse = "\n"),file = file.path(TABLES.OUT,"reallocations_deciles30_stdsize.tex"))

ACS_bg_income_shares_for_tex <- ACS_bg_income_for_tex %>% 
  mutate(original_share = Original/Original[11]) %>%
  mutate(interstate_share = `Max Total Benefits`/`Max Total Benefits`[11])

#count number of zip codes with positive num of systems under reallocations
sum(reallocations.all$num_systems>0)
sum(reallocations.all$interstate30>0)
sum(reallocations.all$new_systems_nonwhite>0)
sum(reallocations.all$new_systems_bottom1>0)
sum(reallocations.all$interstate30!=reallocations.all$new_systems_nonwhite)
sum(reallocations.all$interstate30!=reallocations.all$new_systems_bottom1)
#calculate weighted average per capita benefits under original and reallocations
sum(ACSdata_bg$pc.original*ACSdata_bg$Number.POP,na.rm=TRUE)/sum(ACSdata_bg$Number.POP,na.rm=TRUE)
sum(ACSdata_bg$pc.interstate*ACSdata_bg$Number.POP,na.rm=TRUE)/sum(ACSdata_bg$Number.POP,na.rm=TRUE)
#calculate num systems by state
systems.state <- reallocations.all %>%
  group_by(state) %>%
  select(-zip) %>%
  summarize(across(everything(),sum)) %>%
  mutate(share_systems = num_systems/sum(num_systems)) %>%
  relocate(state,num_systems,share_systems)
south.list <- c("TX","OK","AR","LA","MS","AL","GA","FL","SC","TN","KY","NC","VA","WV","MD","DE")
bottom1.zip.reallocation.south <- sum(systems.state$bottom1_share.reallocation[systems.state$state %in% south.list])/sum(systems.state$bottom1_share.reallocation)


#maps
library(maptools)
US_zips <- readRDS(file.path(DATA.PATH, "USA_Zip_Code_Boundaries_v104_zip_poly.rds"))
US_zips <- merge(US_zips, reallocations.all, by.x = "ZIP_CODE", by.y = "zip")
dots.original <- dotsInPolys(US_zips, as.integer(US_zips$num_systems/10), compatible=FALSE)  # turn shapefile into dot density map (which is a SpatialPointsDataFrame)
dots.interstate <- dotsInPolys(US_zips, as.integer(US_zips$interstate/10), compatible=FALSE)  # turn shapefile into dot density map (which is a SpatialPointsDataFrame)
dots.interstate30 <- dotsInPolys(US_zips, as.integer(US_zips$interstate30/10), compatible=FALSE)  # turn shapefile into dot density map (which is a SpatialPointsDataFrame)
dots.new_systems_nonwhite <- dotsInPolys(US_zips, as.integer(US_zips$new_systems_nonwhite/10), compatible=FALSE)  # turn shapefile into dot density map (which is a SpatialPointsDataFrame)
dots.new_systems_bottom1 <- dotsInPolys(US_zips, as.integer(US_zips$new_systems_bottom1/10), compatible=FALSE)  # turn shapefile into dot density map (which is a SpatialPointsDataFrame)
dots.nonwhite_share.reallocation <- dotsInPolys(US_zips, as.integer(US_zips$nonwhite_share.reallocation/10), compatible=FALSE)  # turn shapefile into dot density map (which is a SpatialPointsDataFrame)
dots.bottom1_share.reallocation <- dotsInPolys(US_zips, as.integer(US_zips$bottom1_share.reallocation/10), compatible=FALSE)  # turn shapefile into dot density map (which is a SpatialPointsDataFrame)

dots.original$simulation <- "Installed"
#dots.interstate$simulation <- "Interstate"
dots.interstate30$simulation <- "Max Total Benefits"
dots.new_systems_nonwhite$simulation <- "Max People of Color"
dots.new_systems_bottom1$simulation <- "Max Bottom Decile"
dots.nonwhite_share.reallocation$simulation <- "Zips of Color"
dots.bottom1_share.reallocation$simulation <- "Bottom Decile Zips"

#dots.all <- rbind(dots.original, dots.interstate, dots.new_systems_nonwhite)
dots.original.interstate <- rbind(dots.original, dots.interstate30)
proj_zip <- proj4string(US_zips)
proj4string(dots.original.interstate) <- proj_zip
dots.original.nonwhite <- rbind(dots.original, dots.new_systems_nonwhite)
proj4string(dots.original.nonwhite) <- proj_zip
dots.original.bottom1 <- rbind(dots.original, dots.new_systems_bottom1)
proj4string(dots.original.bottom1) <- proj_zip
dots.original.nonwhite_share <- rbind(dots.original, dots.nonwhite_share.reallocation)
proj4string(dots.original.nonwhite_share) <- proj_zip
dots.original.bottom1_share <- rbind(dots.original, dots.bottom1_share.reallocation)
proj4string(dots.original.bottom1_share) <- proj_zip

US_zips2 = st_as_sf(readRDS(file.path(DATA.PATH, "USA_Zip_Code_Boundaries_v104_zip_poly.rds")))
US_zips2 = group_by(US_zips2, STATE)
US_zips2 = US_zips2 %>% st_buffer(0)
US_zips2 = US_zips2 %>% group_by(STATE) %>% summarize() 
US_states = as_Spatial(US_zips2)
library(rgeos)
US_states <- gSimplify(US_states, tol=0.01)
state.layer <- list("sp.polygons", US_states)
my.palette <- c("red", "blue")
point.size <- 0.4
dots.original.interstate$simulation <- as.factor(dots.original.interstate$simulation)
dots.original.nonwhite$simulation <- as.factor(dots.original.nonwhite$simulation)
dots.original.bottom1$simulation <- as.factor(dots.original.bottom1$simulation)
dots.original.nonwhite_share$simulation <- as.factor(dots.original.nonwhite_share$simulation)
dots.original.bottom1_share$simulation <- factor(dots.original.bottom1_share$simulation,levels=c("Installed","Bottom Decile Zips"))

map.interstate = spplot(dots.original.interstate, "simulation", sp.layout = state.layer, col.regions = my.palette, cex = point.size, alpha=0.2, key.space = "right" , xlim=c(-124.74, -66.951), ylim=c(24.56, 49.39), par.settings = list(axis.line = list(col = 'transparent')))
map.nonwhite = spplot(dots.original.nonwhite, "simulation", sp.layout = state.layer, col.regions = my.palette, cex = point.size, alpha=0.2, key.space = "right" , xlim=c(-124.74, -66.951), ylim=c(24.56, 49.39), par.settings = list(axis.line = list(col = 'transparent')))
map.bottom1 = spplot(dots.original.bottom1, "simulation", sp.layout = state.layer, col.regions = my.palette, cex = point.size, alpha=0.2, key.space = "right" , xlim=c(-124.74, -66.951), ylim=c(24.56, 49.39), par.settings = list(axis.line = list(col = 'transparent')))
map.nonwhite_share = spplot(dots.original.nonwhite_share, "simulation", sp.layout = state.layer, col.regions = my.palette, cex = point.size, alpha=0.2, key.space = "right" , xlim=c(-124.74, -66.951), ylim=c(24.56, 49.39), par.settings = list(axis.line = list(col = 'transparent')))
map.bottom1_share = spplot(dots.original.bottom1_share, "simulation", sp.layout = state.layer, col.regions = my.palette, cex = point.size, alpha=0.2, key.space = "right" , xlim=c(-124.74, -66.951), ylim=c(24.56, 49.39), par.settings = list(axis.line = list(col = 'transparent')))

#Save maps as PDFs
pdf(file.path(FIGURES.OUT, 'Reallocation_CurrentSystems+Interstate_stdsize.pdf'), width=8, height=4)
map.interstate
dev.off()
pdf(file.path(FIGURES.OUT, 'Reallocation_CurrentSystems+Nonwhite_stdsize.pdf'), width=8, height=4)
map.nonwhite
dev.off()
pdf(file.path(FIGURES.OUT, 'Reallocation_CurrentSystems+Bottom1_stdsize.pdf'), width=8, height=4)
map.bottom1
dev.off()
pdf(file.path(FIGURES.OUT, 'Reallocation_CurrentSystems+Nonwhite_Share_stdsize.pdf'), width=8, height=4)
map.nonwhite_share
dev.off()
pdf(file.path(FIGURES.OUT, 'Reallocation_CurrentSystems+Bottom1_Share_stdsize.pdf'), width=8, height=4)
map.bottom1_share
dev.off()
